last updated: 2023-11-15
Install Packages
As usual, make sure we have the right packages for this exercise
if (!require("pacman")) install.packages("pacman"); library(pacman)
## Loading required package: pacman
# let's load all of the files we were using and want to have again today
p_load("tidyverse", "knitr", "readr",
"pander", "BiocManager",
"dplyr", "stringr",
"statmod", # required dependency, need to load manually on some macOS versions.
"Glimma", # beautifies limma results
"purrr", # for working with lists (beautify column names)
"reactable") # for pretty tables.
# for today:
# p_load("WGCNA") # WGCNA is available on CRAN
if (!require("WGCNA")) BiocManager::install("WGCNA", force=TRUE); library(WGCNA)
## Loading required package: WGCNA
## Loading required package: dynamicTreeCut
## Loading required package: fastcluster
##
## Attaching package: 'fastcluster'
## The following object is masked from 'package:stats':
##
## hclust
##
##
## Attaching package: 'WGCNA'
## The following object is masked from 'package:stats':
##
## cor
#
# # We also need these Bioconductor packages today.
p_load("edgeR")
p_load("matrixStats")
p_load("org.Sc.sgd.db")
p_load("AnnotationDbi")
p_load("cowplot")
p_load("igraph")
Use limma to identify differential patterns of proteomic and transcriptomic changes in a time series heat shock experiment.
At the end of this exercise, you should be able to:
We are downloading the expected raw counts from a Github
repository using the code below. Just as in previous exercises, assign
the data to the variable counts. You can change the file
path if you have saved it to your computer in a different
location.
# Load in raw counts for all samples
counts <- read.delim('https://github.com/clstacy/GenomicDataAnalysis_Fa23/raw/main/data/YPS606_TF_Rep1_4_ExpectedCounts.txt',
sep = "\t",
header = T,
row.names = 1 # convert first column to rownames
)
# tidy raw counts to visualize distributions
tidy_counts <- counts %>%
rownames_to_column("gene_id") %>%
tidyr::pivot_longer(
., # The dot is the the input data, magrittr tutorial
col = -gene_id
) %>%
separate_wider_delim(name, delim = "_", names = c("strain", "stress", "replicate"),cols_remove = FALSE)
(
p <- tidy_counts %>%
ggplot(., aes(x = replicate, y = value)) + # x = treatment, y = RNA Seq count
geom_violin() + # violin plot, show distribution
geom_point(alpha = 0.2) + # add transparent points
theme_bw() +
theme(
axis.text.x = element_text(angle = 90) # Rotate treatment text
) +
labs(x = "Treatment Groups", y = "RNA Seq Counts") +
facet_grid(cols = vars(stress), rows = vars(strain), drop = TRUE, scales = "free_x") + # Facet by condition
# scale_y_log10() +
ggtitle("Raw count distributions")
)
Let’s use edgeR to normalize the raw counts
# read into a DGElist
y <- edgeR::DGEList(counts,
group=as.factor(
# remove replicate from name
sub("_[^_]+$", "", colnames(counts))
),
genes = rownames(counts)
)
# filter low counts
keep <- rowSums(cpm(y) > 1) >= 4
y <- y[keep,]
# normalize with cpm
normalized_counts <- cpm(y,
log = TRUE)
str(normalized_counts)
## num [1:5756, 1:48] 4.59 5.07 10.95 0.18 8.03 ...
## - attr(*, "dimnames")=List of 2
## ..$ : chr [1:5756] "YAL001C" "YAL002W" "YAL003W" "YAL004W" ...
## ..$ : chr [1:48] "YPS606_Mock_Rep1" "YPS606_Mock_Rep2" "YPS606_Mock_Rep3" "YPS606_Mock_Rep4" ...
Now that we’ve normalized, let’s look at the distributions again
# create data
tidy_normalized_counts <- normalized_counts %>%
data.frame() %>%
rownames_to_column("gene_id") %>%
tidyr::pivot_longer(
., # The dot is the the input data, magrittr tutorial
col = -gene_id
) %>%
separate_wider_delim(name, delim = "_", names = c("strain", "stress", "replicate"),cols_remove = FALSE)
(
p_normalized <- tidy_normalized_counts %>%
ggplot(., aes(x = replicate, y = value)) + # x = treatment, y = RNA Seq count
geom_violin() + # violin plot, show distribution
geom_point(alpha = 0.2) + # add transparent points
theme_bw() +
theme(
axis.text.x = element_text(angle = 90) # Rotate treatment text
) +
labs(x = "Treatment Groups", y = "normalized expression") +
facet_grid(cols = vars(stress),rows = vars(strain), drop = TRUE, scales = "free_x") + # Facet by condition
# scale_y_log10() +
ggtitle("Normalized CPM distribution")
)
For WGCNA, we don’t want to correlate all of the genes because (1) computational limitation and (2) genes that vary a small amount increase the number of variables in the model, making it harder to be confident in the patterns we do see. One way to account for this is by subsetting our entire gene list.
One common approach for subsetting genes is taking genes whose variance is greater than the \(n^{th}\) quantile of gene varainces. We’ll do that now:
# get the variance for each row (gene)
rv_wpn <- matrixStats::rowVars(normalized_counts)
# see a summary of the quantiles of variances
summary(rv_wpn)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.01052 0.14640 0.30679 0.78837 0.74320 20.77257
# get variance corresponding to 75th percentile
q75_wpn <- quantile( matrixStats::rowVars(normalized_counts), .75) # common cutoff
# # get variance corresponding to 95th percentile
# q95_wpn <- quantile( matrixStats::rowVars(normalized_counts), .95) # <= changed to 95 quantile
# to reduce dataset, only get genes with variance greater than above
normalized_counts_variable_genes <- normalized_counts[ rv_wpn > q75_wpn, ]
str(normalized_counts_variable_genes)
## num [1:1439, 1:48] 0.18 8.03 2.32 -2.21 5.16 ...
## - attr(*, "dimnames")=List of 2
## ..$ : chr [1:1439] "YAL004W" "YAL005C" "YAL008W" "YAL016C-A" ...
## ..$ : chr [1:48] "YPS606_Mock_Rep1" "YPS606_Mock_Rep2" "YPS606_Mock_Rep3" "YPS606_Mock_Rep4" ...
Notice the number of genes retained is much fewer now.
What are the distributions of counts of the normalized cpm’s of the retained genes?
tidy_normalized_counts_variable_genes <- normalized_counts_variable_genes %>%
data.frame() %>%
rownames_to_column("gene_id") %>%
pivot_longer(-gene_id) %>%
separate_wider_delim(name, delim = "_", names = c("strain", "stress", "replicate"),cols_remove = FALSE)
(
p_normalized_varGenes <- tidy_normalized_counts_variable_genes %>%
ggplot(., aes(x = replicate, y = value)) + # x = treatment, y = RNA Seq count
geom_violin() + # violin plot, show distribution
geom_point(alpha = 0.2) + # add transparent points
theme_bw() +
theme(
axis.text.x = element_text(angle = 90) # Rotate treatment text
) +
labs(x = "Treatment Groups", y = "normalized expression") +
facet_grid(cols = vars(stress),rows = vars(strain), drop = TRUE, scales = "free_x") + # Facet by condition
# scale_y_log10() +
ggtitle("Normalized CPM distribution")
)
For the analyses we’ve done so far, we’ve needed the columns to
correspond to samples and the rows to correspond to genes. WGCNA expects
the opposite, where columns are genes and rows are samples. We can use
the transpose function in r t() in order to convert our
data. Let’s do that now:
# transpose our normalized count matrix and save to new object
variable_genes_matrix = t(normalized_counts_variable_genes)
# Look at first 5 rows and 10 columns
variable_genes_matrix[1:5,1:10]
## YAL004W YAL005C YAL008W YAL016C-A YAL017W YAL025C
## YPS606_Mock_Rep1 0.1800382 8.028244 2.324995 -2.2084931 5.159889 6.894753
## YPS606_Mock_Rep2 -1.4421388 7.958597 2.393871 -1.4421388 5.210439 6.863944
## YPS606_Mock_Rep3 2.7578618 8.441883 2.463258 -2.1949107 5.172265 6.610721
## YPS606_Mock_Rep4 0.6912945 8.266050 2.514840 -2.2718848 5.102592 6.520798
## YPS606_EtOH_Rep1 3.3881231 13.421082 4.596529 -0.1718948 7.498577 2.560692
## YAL028W YAL036C YAL037W YAL053W
## YPS606_Mock_Rep1 0.57088502 7.529241 0.08734029 7.332505
## YPS606_Mock_Rep2 0.56447336 7.658894 0.49035704 7.160599
## YPS606_Mock_Rep3 0.02355968 7.491845 -0.46143877 7.299935
## YPS606_Mock_Rep4 1.23215081 7.599281 -0.52142280 7.104422
## YPS606_EtOH_Rep1 4.24326006 4.160897 1.77493580 7.000541
We can see now that the rows = treatments and
columns = gene probes. We’re ready to start WGCNA. A
correlation network would be a complete network (all genes would be
connected to all other genes). Hence we need to pick a threshhold value
(if correlation is below threshold, remove the edge). We assume the true
biological network follows a scale-free structure (see papers by Albert
Barabasi).
To do that, WGCNA will try a range of soft thresholds and create a diagnostic plot. This step can take several minutes.
# library(WGCNA)
allowWGCNAThreads(nThreads = 4) # allow multi-threading (optional)
## Allowing multi-threading with up to 4 threads.
#> Allowing multi-threading with up to 4 threads.
# Choose a set of soft-thresholding powers
powers = c(c(1:10), seq(from = 12, to = 20, by = 2))
# Call the network topology analysis function
sft = pickSoftThreshold(
variable_genes_matrix, # <= Input data
#blockSize = 30,
powerVector = powers,
verbose = 5
)
## pickSoftThreshold: will use block size 1439.
## pickSoftThreshold: calculating connectivity for given powers...
## ..working on genes 1 through 1439 of 1439
## Power SFT.R.sq slope truncated.R.sq mean.k. median.k. max.k.
## 1 1 0.9360 1.8100 0.945 798.0 875.0 1020
## 2 2 0.7490 0.6570 0.816 559.0 623.0 831
## 3 3 0.4600 0.2840 0.639 428.0 472.0 710
## 4 4 0.0688 0.0803 0.524 344.0 368.0 623
## 5 5 0.0400 -0.0627 0.675 286.0 292.0 558
## 6 6 0.2630 -0.1580 0.831 243.0 238.0 507
## 7 7 0.4550 -0.2320 0.895 210.0 196.0 465
## 8 8 0.6190 -0.2910 0.914 183.0 164.0 430
## 9 9 0.7500 -0.3350 0.947 162.0 137.0 400
## 10 10 0.7980 -0.3840 0.958 145.0 117.0 374
## 11 12 0.8780 -0.4610 0.950 117.0 87.8 331
## 12 14 0.9080 -0.5160 0.949 97.5 67.8 296
## 13 16 0.9270 -0.5630 0.941 82.5 52.7 268
## 14 18 0.9580 -0.6080 0.964 70.7 42.5 245
## 15 20 0.9460 -0.6450 0.942 61.4 34.6 225
Let’s visualize those results
par(mfrow = c(1,2));
cex1 = 0.9;
plot(sft$fitIndices[, 1],
-sign(sft$fitIndices[, 3]) * sft$fitIndices[, 2],
xlab = "Soft Threshold (power)",
ylab = "Scale Free Topology Model Fit, signed R^2",
main = paste("Scale independence")
)
text(sft$fitIndices[, 1],
-sign(sft$fitIndices[, 3]) * sft$fitIndices[, 2],
labels = powers, cex = cex1, col = "red"
)
abline(h = 0.90, col = "red")
plot(sft$fitIndices[, 1],
sft$fitIndices[, 5],
xlab = "Soft Threshold (power)",
ylab = "Mean Connectivity",
type = "n",
main = paste("Mean connectivity")
)
text(sft$fitIndices[, 1],
sft$fitIndices[, 5],
labels = powers,
cex = cex1, col = "red")
Pick a soft threshold power near the curve of the plot. We’ll pick
12, but feel free to experiment with other powers to see how it affects
your results. Now we can create the network using the
blockwiseModules command. The blockwiseModule
may take a while to run, since it is constructing the TOM (topological
overlap matrix) and several other steps. While it runs, take a look at
the blockwiseModule documentation (link to [vignette][https://www.rdocumentation.org/packages/WGCNA/versions/1.69/topics/blockwiseModules])
for more information on the parameters. How might you change the
parameters to get more or less modules?
# NOTE: Be sure to run this entire chunk at once
picked_power = 12
cor <- WGCNA::cor # Force it to use WGCNA cor function (fix a namespace conflict issue)
variable_genes_network <- blockwiseModules(variable_genes_matrix, # <= input here
# == Adjacency Function ==
power = picked_power, # <= power here
networkType = "signed",
# == Tree and Block Options ==
deepSplit = 2,
pamRespectsDendro = F,
# detectCutHeight = 0.75,
minModuleSize = 30,
maxBlockSize = 4000,
# == Module Adjustments ==
reassignThreshold = 0,
mergeCutHeight = 0.25,
# == TOM == Archive the run results in TOM file (saves time)
saveTOMs = T,
saveTOMFileBase = "ER",
# == Output Options
numericLabels = F,
verbose = 3)
## Calculating module eigengenes block-wise from all genes
## Flagging genes and samples with too many missing values...
## ..step 1
## ..Working on block 1 .
## TOM calculation: adjacency..
## ..will use 4 parallel threads.
## Fraction of slow calculations: 0.000000
## ..connectivity..
## ..matrix multiplication (system BLAS)..
## ..normalization..
## ..done.
## ..saving TOM for block 1 into file ER-block.1.RData
## ....clustering..
## ....detecting modules..
## ....calculating module eigengenes..
## ....checking kME in modules..
## ..removing 20 genes from module 1 because their KME is too low.
## ..removing 10 genes from module 2 because their KME is too low.
## ..removing 8 genes from module 3 because their KME is too low.
## ..removing 2 genes from module 4 because their KME is too low.
## ..removing 2 genes from module 5 because their KME is too low.
## ..merging modules that are too close..
## mergeCloseModules: Merging modules whose distance is less than 0.25
## Calculating new MEs...
# return cor() to be the usual command
cor <- stats::cor
Let’s visualize a dendrogram of those modules, each terminal branch corresponds to a single gene.
# Convert labels to colors for plotting
mergedColors_var_genes = labels2colors(variable_genes_network$colors)
# Plot the dendrogram and the module colors underneath
plotDendroAndColors(
variable_genes_network$dendrograms[[1]],
mergedColors_var_genes[variable_genes_network$blockGenes[[1]]],
"Module colors",
dendroLabels = FALSE,
hang = 0.03,
addGuide = TRUE,
guideHang = 0.05 )
Now let’s extract the genes and their coresponding module/color to a data frame
modules_variable_genes_df <- data.frame(
gene_id = names(variable_genes_network$colors),
colors = labels2colors(variable_genes_network$colors)
)
head(modules_variable_genes_df)
## gene_id colors
## 1 YAL004W red
## 2 YAL005C red
## 3 YAL008W red
## 4 YAL016C-A black
## 5 YAL017W red
## 6 YAL025C turquoise
# # pick out a few modules of interest here
# modules_of_interest = c("turquoise", "blue", "brown", "grey")
#
# # Pull out list of genes in that module
# subset_modules_variable_genes_df = modules_variable_genes_df %>%
# subset(colors %in% modules_of_interest)
#
# row.names(modules_variable_genes_df) = modules_variable_genes_df$gene_id
#
# # Get normalized expression for those genes
# subexpr = normalized_counts_variable_genes[subset_modules_variable_genes_df$gene_id,]
#
# # convert to a data frame
# subset_modules_variable_genes_df = data.frame(subexpr) %>%
# mutate(
# gene_id = row.names(.)
# ) %>%
# pivot_longer(-gene_id) %>%
# mutate(
# module = modules_variable_genes_df[gene_id,]$colors
# )
#
# subset_modules_variable_genes_df %>%
# ggplot(., aes(x=name, y=1+value, group=gene_id)) +
# geom_line(aes(color = module),
# alpha = 0.2) +
# theme_bw() +
# theme(
# axis.text.x = element_text(angle = 90)
# ) +
# facet_grid(rows = vars(module),
# # scales = "free_y"
# ) +
# labs(x = "treatment",
# y = "normalized expression") +
# scale_color_identity() #+
# # scale_y_log10()
Plot gene expression correlations in a grid, based on genotype and module membership
(
var_genes_plot <- normalized_counts_variable_genes %>%
data.frame() %>%
mutate(
gene_id = row.names(.)
) %>%
pivot_longer(-gene_id) %>%
mutate(
module = variable_genes_network$colors[gene_id]
) %>%
separate_wider_delim(name, delim = "_", names = c("strain", "stress", "replicate"),cols_remove = FALSE) %>%
ggplot(., aes(x=name, y=value, group=gene_id)) +
geom_line(aes(color = "grey"), size=1,
alpha = 0.2) +
geom_line(aes(color = module),
alpha = 0.2) +
# geom_boxplot(width=0.5,
# aes(group=name, color = module),
# outlier.shape = NA,
# alpha=0.9) +
theme_classic() +
theme(axis.text.x = element_text(angle = 90)) +
facet_grid(rows = vars(module),
cols= vars(strain),
scales = "free"
) +
scale_x_discrete(position = "top") +
labs(x = "treatment",
y = "normalized expression") +
# use color variable as the color
scale_color_identity()
)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
Now, let’s generate a heatmap of the relationship between the modules
# Get Module Eigengenes per cluster
MEs0 <- moduleEigengenes(variable_genes_matrix, mergedColors_var_genes)$eigengenes
# Reorder modules so similar modules are next to each other
MEs0 <- orderMEs(MEs0)
module_order = names(MEs0) %>% gsub("ME","", .)
# Add treatment names
MEs0$treatment = row.names(MEs0)
# tidy & plot data
mME = MEs0 %>%
pivot_longer(-treatment) %>%
mutate(
name = gsub("ME", "", name),
name = factor(name, levels = module_order)
)
mME %>% ggplot(., aes(x=treatment, y=name, fill=value)) +
geom_tile() +
theme_bw() +
scale_fill_gradient2(
low = "blue",
high = "red",
mid = "white",
midpoint = 0,
limit = c(-1,1)) +
theme(axis.text.x = element_text(angle=90)) +
labs(title = "Module-trait Relationships", y = "Modules", fill="corr")
We can generate a network graph file for analysis in Cytoscape or as an edge/vertices file.
i think this code chunk can be removed
# # we could subset to only certain modules, if we want to.
# genes_of_interest = modules_variable_genes_df #%>%
# # subset(colors %in% modules_of_interest)
#
# # if we subset, this would only get genes in selected modules
# expr_of_interest = normalized_counts_variable_genes[genes_of_interest$gene_id,]
#
# # Only recalculate TOM for modules of interest (faster, altho there's some online discussion if this will be slightly off)
# TOM = TOMsimilarityFromExpr(t(expr_of_interest),
# power = picked_power)
#
# # Add gene names to row and columns
# row.names(TOM) = row.names(expr_of_interest)
# colnames(TOM) = row.names(expr_of_interest)
#
# edge_list_var_genes = data.frame(TOM) %>%
# mutate(
# gene1 = row.names(.)
# ) %>%
# pivot_longer(-gene1) %>%
# dplyr::rename(gene2 = name, correlation = value) %>%
# unique() %>%
# subset(!(gene1==gene2))
#
# head(edge_list_var_genes)
# correlation cutoff
corr_cutoff = 0.3
# first, create adjacency matrix
adjacency = adjacency(variable_genes_matrix, power=picked_power, type="signed")
# Calculate topological overlap matrix and dissimilarity from adjacency matrix
TOM = TOMsimilarity(adjacency, TOMType="signed")
## ..connectivity..
## ..matrix multiplication (system BLAS)..
## ..normalization..
## ..done.
# Add gene names to row and columns
rownames(TOM) = AnnotationDbi::select(org.Sc.sgd.db,keys=row.names(adjacency),columns="GENENAME") %>% mutate(label = case_when(
is.na(GENENAME) ~ ORF,
TRUE ~ GENENAME)) %>% pull(label)
## 'select()' returned 1:1 mapping between keys and columns
colnames(TOM) = AnnotationDbi::select(org.Sc.sgd.db,keys=row.names(adjacency),columns="GENENAME") %>% mutate(label = case_when(
is.na(GENENAME) ~ ORF,
TRUE ~ GENENAME)) %>% pull(label)
## 'select()' returned 1:1 mapping between keys and columns
# assign TOM to new variable
adj <- TOM
# only show correlations greater than corr_cutoff
adj[adj > corr_cutoff] = 1
# set all correlations below that threshold to 0
adj[adj != 1] = 0
# graph adjacency network
network_graph_data <- graph.adjacency(adj)
# remove redundant self-loops in the network
network_graph_data <- simplify(network_graph_data)
# add colors for the network graph from constructed network modules
V(network_graph_data)$color <- variable_genes_network$colors
# set plotting margins
par(mar=c(0,0,0.5,0))
# remove unconnected nodes
network_graph_data <- delete.vertices(network_graph_data,
degree(network_graph_data)==0)
plot(network_graph_data,
layout=layout.fruchterman.reingold(network_graph_data, niter=1000),
vertex.size=3,edge.arrow.size=0.1,
vertex.label.cex = 0.01, # increase this value to see gene names
vertex.label.color= "black", rescale=TRUE, ylim=c(-1,1),xlim=c(-1,1),
main = "Network plot for most variable genes")
# create list of genes in each one
variable_geneIDs_module_list <- normalized_counts_variable_genes %>%
data.frame() %>%
mutate(
gene_id = row.names(.)
) %>%
pivot_longer(-gene_id) %>%
mutate(
module = variable_genes_network$colors[gene_id]
) %>%
group_by(module) %>%
# only get genes that are DE
# filter(DE_direction != 0) %>%
group_split() %>% # split into many lists
purrr::set_names(purrr::map_chr(., ~.x$module[1])) %>% # assign names
# get just the ORF names
map(., ~ .x |> pull(all_of("gene_id"))) %>%
# remove duplicates
map(., ~ unique(.))
# run enrichment for each module
GO_enrich_variable_gene_modules <- clusterProfiler::compareCluster(
geneCluster = variable_geneIDs_module_list,
ont = "BP",
OrgDb = org.Sc.sgd.db,
pAdjustMethod = "BH",
pvalueCutoff = 0.5, # high to get for all modules
qvalueCutoff = 0.5,
readable = FALSE,
fun = 'enrichGO',
keyType = 'ORF'
) |>
# let's add a 'richFactor' column that gives us the proportion of genes DE in the term
mutate(richFactor = Count / as.numeric(sub("/\\d+", "", BgRatio)))
# plot those enrichments
GO_enrich_variable_gene_modules %>%
as.data.frame() %>%
group_by(Cluster) %>%
slice_min(order_by=pvalue, n = 5) %>%
ggplot(.,
aes(Cluster, fct_reorder(Description, richFactor))) +
geom_point(aes(color = p.adjust, size = Count)) +
scale_color_gradientn(
colours = c("#f7ca64", "#46bac2", "#7e62a3"),
trans = "log10",
guide = guide_colorbar(reverse = TRUE, order = 1)
) +
scale_size_continuous(range = c(2, 12)) +
scale_x_discrete(position = "top") +
scale_y_discrete(labels = function(x) str_wrap(x, width = 25)) +
xlab("Module") +
ylab(NULL) +
ggtitle("Enriched GO Categories by Module") +
# facet_wrap(~DE, scales = "free_y") +
theme_bw() +
theme(text = element_text(size=14),
axis.text.x = element_text(angle = 90,hjust=0) )
# here are the data to add to cowplot:
GO_summaries_variable_genes <- GO_enrich_variable_gene_modules %>%
as.data.frame() %>%
group_by(Cluster) %>%
slice_min(order_by=p.adjust, n = 5) %>%
slice_head(n = 5) %>%
mutate(Description = case_when(
p.adjust < 0.05 ~ paste0(Description," (",Count,")"),
TRUE ~ " "
)) %>%
summarize(description = str_c(Description, collapse = "\n")) %>%
pull(description)
# draw plot with GO terms
cowplot::ggdraw(var_genes_plot +
theme(plot.margin = margin(t = 0.5, r = 5, b = 0.5, l = 0.1, unit = "cm"))) +
cowplot::draw_text(GO_summaries_variable_genes,
x = 0.76, # Adjust this value for the desired horizontal position
y = rev(seq(0.1, 0.82, length.out = length(GO_summaries_variable_genes))), # Adjust this sequence for the desired vertical positions
size = 6,
hjust = 0, # Align to the left
vjust = 0.5)
Our network so far didn’t use any prior information about genes to include or exclude, just those that are most variable. We do have information about the genes that are considered differentially expressed in their corresponding genotype and environment. What does the network look like if we build it based on those genes that have been identified as differentially expressed?
# set FDR threshold for being considered a DE gene,
# this is applied to the data we are loading in here.
FDR_cutoff <- 0.01
# Load in DE gene summary
DE_genes <- readr::read_delim("https://github.com/clstacy/GenomicDataAnalysis_Fa23/raw/main/data/DE_yeast_TF_stress.txt.gz",
delim = "\t",
escape_double = FALSE,
name_repair = "universal",
show_col_types=FALSE,
trim_ws = TRUE) %>%
dplyr::select(ID, contains("FDR")) %>%
dplyr::select(ID, contains(".v.")) %>%
pivot_longer(-ID, names_to = "contrast", values_to = "FDR") %>%
# only get genes with FDR < 0.01
filter(FDR < FDR_cutoff) %>%
pull(ID) %>%
unique()
## New names:
## • `logFC: WT NaCl response` -> `logFC..WT.NaCl.response`
## • `FDR: WT NaCl response` -> `FDR..WT.NaCl.response`
## • `logFC: msn24 mutant NaCl response` -> `logFC..msn24.mutant.NaCl.response`
## • `FDR: msn24 mutant NaCl response` -> `FDR..msn24.mutant.NaCl.response`
## • `logFC: yap1 mutant NaCl response` -> `logFC..yap1.mutant.NaCl.response`
## • `FDR: yap1 mutant NaCl response` -> `FDR..yap1.mutant.NaCl.response`
## • `logFC: skn7 mutant NaCl response` -> `logFC..skn7.mutant.NaCl.response`
## • `FDR: skn7 mutant NaCl response` -> `FDR..skn7.mutant.NaCl.response`
## • `logFC: WT EtOH response` -> `logFC..WT.EtOH.response`
## • `FDR: WT EtOH response` -> `FDR..WT.EtOH.response`
## • `logFC: msn24 mutant EtOH response` -> `logFC...msn24.mutant.EtOH.response`
## • `FDR: msn24 mutan EtOH response` -> `FDR...msn24.mutan..EtOH.response`
## • `logFC: yap1 mutant EtOH response` -> `logFC...yap1.mutant.EtOH.response`
## • `FDR: yap1 mutant EtOH response` -> `FDR...yap1.mutant.EtOH.response`
## • `logFC: skn7 mutant EtOH response` -> `logFC...skn7.mutant.EtOH.response`
## • `FDR: skn7 mutant EtOH response` -> `FDR...skn7.mutant.EtOH.response`
## • `logFC: WT v msn24 mutant: NaCl response` ->
## `logFC..WT.v.msn24.mutant..NaCl.response`
## • `FDR: WT v msn24 mutant: NaCl response` ->
## `FDR..WT.v.msn24.mutant..NaCl.response`
## • `logFC: WT v yap1 mutant: NaCl response` ->
## `logFC..WT.v.yap1.mutant..NaCl.response`
## • `FDR: WT v yap1 mutant: NaCl response` ->
## `FDR..WT.v.yap1.mutant..NaCl.response`
## • `logFC: WT v skn7 mutant: NaCl response` ->
## `logFC..WT.v.skn7.mutant..NaCl.response`
## • `FDR: WT v skn7 mutant NaCl response` ->
## `FDR..WT.v.skn7.mutant.NaCl.response`
## • `logFC: WT v msn24 mutant: EtOH response` ->
## `logFC..WT.v.msn24.mutant..EtOH.response`
## • `FDR: WT v msn24 mutant: EtOH response` ->
## `FDR..WT.v.msn24.mutant..EtOH.response`
## • `logFC: WT v yap1 mutant: EtOH response` ->
## `logFC..WT.v.yap1.mutant..EtOH.response`
## • `FDR: WT v yap1 mutant: EtOH response` ->
## `FDR..WT.v.yap1.mutant..EtOH.response`
## • `logFC: WT v skn7 mutant: EtOH response` ->
## `logFC..WT.v.skn7.mutant..EtOH.response`
## • `FDR: WT v skn7 mutant: EtOH response` ->
## `FDR..WT.v.skn7.mutant..EtOH.response`
# subset the counts to only be counts for genes that are DE
counts_DE <- counts %>%
rownames_to_column("ORF") %>%
dplyr::filter(ORF %in% DE_genes) %>%
column_to_rownames("ORF")
# normalize with cpm, filter to just DE genes
normalized_counts_DE_genes <- cpm(y,
log = TRUE) %>%
data.frame() %>%
rownames_to_column("ORF") %>%
dplyr::filter(ORF %in% DE_genes) %>%
column_to_rownames("ORF") %>%
as.matrix()
DE_genes_matrix = t(normalized_counts_DE_genes)
What is the correct power for this analysis?
# Choose a set of soft-thresholding powers
powers = c(c(1:10), seq(from = 12, to = 20, by = 2))
# Call the network topology analysis function
sft = pickSoftThreshold(
DE_genes_matrix, # <= Input data
#blockSize = 30,
powerVector = powers,
verbose = 5
)
## pickSoftThreshold: will use block size 1972.
## pickSoftThreshold: calculating connectivity for given powers...
## ..working on genes 1 through 1972 of 1972
## Power SFT.R.sq slope truncated.R.sq mean.k. median.k. max.k.
## 1 1 0.9580 2.2300 0.949 1010.0 1090.0 1320
## 2 2 0.9110 0.7890 0.886 671.0 724.0 1040
## 3 3 0.7050 0.2980 0.636 493.0 521.0 868
## 4 4 0.0154 0.0234 -0.155 383.0 390.0 746
## 5 5 0.3260 -0.1420 0.182 308.0 302.0 654
## 6 6 0.6560 -0.2610 0.618 255.0 240.0 581
## 7 7 0.7660 -0.3380 0.729 214.0 193.0 521
## 8 8 0.8500 -0.4020 0.833 183.0 157.0 471
## 9 9 0.9010 -0.4510 0.897 159.0 129.0 429
## 10 10 0.9290 -0.4860 0.928 139.0 108.0 393
## 11 12 0.9520 -0.5480 0.960 109.0 76.6 333
## 12 14 0.9670 -0.6100 0.967 88.1 56.1 290
## 13 16 0.9690 -0.6660 0.964 72.6 42.3 258
## 14 18 0.9670 -0.7070 0.958 60.8 32.5 231
## 15 20 0.9700 -0.7490 0.962 51.6 25.7 210
par(mfrow = c(1,2));
cex1 = 0.9;
plot(sft$fitIndices[, 1],
-sign(sft$fitIndices[, 3]) * sft$fitIndices[, 2],
xlab = "Soft Threshold (power)",
ylab = "Scale Free Topology Model Fit, signed R^2",
main = paste("Scale independence")
)
text(sft$fitIndices[, 1],
-sign(sft$fitIndices[, 3]) * sft$fitIndices[, 2],
labels = powers, cex = cex1, col = "red"
)
abline(h = 0.90, col = "red")
plot(sft$fitIndices[, 1],
sft$fitIndices[, 5],
xlab = "Soft Threshold (power)",
ylab = "Mean Connectivity",
type = "n",
main = paste("Mean connectivity")
)
text(sft$fitIndices[, 1],
sft$fitIndices[, 5],
labels = powers,
cex = cex1, col = "red")
It looks like a higher power would better fit this data. Let’s try with
12. You can try changing to a different threshold and see how the
results change.
picked_power = 12
cor <- WGCNA::cor # Force it to use WGCNA cor function (fix a namespace conflict issue)
DE_genes_network <- blockwiseModules(DE_genes_matrix, # <= input here
# == Adjacency Function ==
power = picked_power, # <= power here
networkType = "signed",
# == Tree and Block Options ==
deepSplit = 2,
pamRespectsDendro = F,
# detectCutHeight = 0.75,
minModuleSize = 30,
maxBlockSize = 4000,
# == Module Adjustments ==
reassignThreshold = 0,
mergeCutHeight = 0.25,
# == TOM == Archive the run results in TOM file (saves time)
saveTOMs = T,
saveTOMFileBase = "ER",
# == Output Options
numericLabels = F,
verbose = 3)
## Calculating module eigengenes block-wise from all genes
## Flagging genes and samples with too many missing values...
## ..step 1
## ..Working on block 1 .
## TOM calculation: adjacency..
## ..will use 4 parallel threads.
## Fraction of slow calculations: 0.000000
## ..connectivity..
## ..matrix multiplication (system BLAS)..
## ..normalization..
## ..done.
## ..saving TOM for block 1 into file ER-block.1.RData
## ....clustering..
## ....detecting modules..
## ....calculating module eigengenes..
## ....checking kME in modules..
## ..removing 10 genes from module 1 because their KME is too low.
## ..removing 5 genes from module 2 because their KME is too low.
## ..removing 4 genes from module 3 because their KME is too low.
## ..removing 3 genes from module 4 because their KME is too low.
## ..removing 3 genes from module 5 because their KME is too low.
## ..merging modules that are too close..
## mergeCloseModules: Merging modules whose distance is less than 0.25
## Calculating new MEs...
# fix namespace issue with cor to return to desired function
cor <- stats::cor
# Convert labels to colors for plotting
mergedColors_DE = labels2colors(DE_genes_network$colors)
# Plot the dendrogram and the module colors underneath
plotDendroAndColors(
DE_genes_network$dendrograms[[1]],
mergedColors_DE[DE_genes_network$blockGenes[[1]]],
"Module colors",
dendroLabels = FALSE,
hang = 0.03,
addGuide = TRUE,
guideHang = 0.05 )
Now let’s extract the genes and their corresponding module/color to a data frame
modules_DE_genes_df <- data.frame(
gene_id = names(variable_genes_network$colors),
colors = labels2colors(variable_genes_network$colors)
)
head(modules_DE_genes_df)
## gene_id colors
## 1 YAL004W red
## 2 YAL005C red
## 3 YAL008W red
## 4 YAL016C-A black
## 5 YAL017W red
## 6 YAL025C turquoise
(
DE_genes_plot <- normalized_counts_DE_genes %>%
data.frame() %>%
mutate(
gene_id = row.names(.)
) %>%
pivot_longer(-gene_id) %>%
mutate(
module = DE_genes_network$colors[gene_id]
) %>%
separate_wider_delim(name, delim = "_", names = c("strain", "stress", "replicate"),cols_remove = FALSE) %>%
ggplot(., aes(x=name, y=value+1, group=gene_id)) +
geom_line(aes(color = "grey"), size=1,
alpha = 0.2) +
geom_line(aes(color = module),
alpha = 0.2) +
# geom_boxplot(width=0.5,
# aes(group=name, color = module),
# outlier.shape = NA,
# alpha=0.9) +
theme_classic() +
theme(axis.text.x = element_text(angle = 90)) +
facet_grid(rows = vars(module),
cols= vars(strain),
scales = "free"
) +
scale_x_discrete(position = "top") +
labs(x = "treatment",
y = "normalized expression") +
# use color variable as the color
scale_color_identity()
)
Generate a heatmap of the eigengenes
# Get Module Eigengenes per cluster
DE_eigengenes <- moduleEigengenes(DE_genes_matrix, mergedColors_DE)$eigengenes
# Reorder modules so similar modules are next to each other
DE_eigengenes <- orderMEs(DE_eigengenes)
# get order of modules without extra naming
module_order = names(DE_eigengenes) %>% gsub("ME","", .)
# Add treatment names
DE_eigengenes$treatment = row.names(DE_eigengenes)
# tidy & plot data
tidy_DE_eigengenes <- DE_eigengenes %>%
pivot_longer(-treatment) %>%
mutate(
name = gsub("ME", "", name),
name = factor(name, levels = module_order)
)
tidy_DE_eigengenes %>%
ggplot(., aes(x=treatment, y=name, fill=value)) +
geom_tile() +
theme_bw() +
scale_fill_gradient2(
low = "blue",
high = "red",
mid = "white",
midpoint = 0,
limit = c(-1,1)) +
theme(axis.text.x = element_text(angle=90)) +
labs(title = "Module-trait Relationships", y = "Modules", fill="corr")
The network file can be generated for Cytoscape or as an edge/vertices file.
genes_of_interest_DE = modules_DE_genes_df #%>%
#subset(colors %in% modules_of_interest)
expr_of_interest_DE = normalized_counts_variable_genes#[genes_of_interest$gene_id,]
# Only recalculate TOM for modules of interest (faster, altho there's some online discussion if this will be slightly off)
TOM_DE = TOMsimilarityFromExpr(t(expr_of_interest_DE),
power = picked_power)
## TOM calculation: adjacency..
## ..will use 4 parallel threads.
## Fraction of slow calculations: 0.000000
## ..connectivity..
## ..matrix multiplication (system BLAS)..
## ..normalization..
## ..done.
# Add gene names to row and columns
row.names(TOM_DE) = row.names(expr_of_interest_DE)
colnames(TOM_DE) = row.names(expr_of_interest_DE)
edge_list_DE = data.frame(TOM_DE) %>%
mutate(
gene1 = row.names(.)
) %>%
pivot_longer(-gene1) %>%
dplyr::rename(gene2 = name, correlation = value) %>%
unique() %>%
subset(!(gene1==gene2))
head(edge_list_DE)
## # A tibble: 6 Ă— 3
## gene1 gene2 correlation
## <chr> <chr> <dbl>
## 1 YAL004W YAL005C 0.182
## 2 YAL004W YAL008W 0.0689
## 3 YAL004W YAL016C.A 0.00000693
## 4 YAL004W YAL017W 0.162
## 5 YAL004W YAL025C 0.153
## 6 YAL004W YAL028W 0.0835
What does the distribution of correlations look like?
edge_list_DE %>%
ggplot(aes(x=correlation)) +
geom_histogram(bins=60) +
theme_bw()
Let’s create the network based on these DE genes
# correlation cutoff
corr_cutoff = 0.3
# first, create adjacency matrix
adjacency_DE = adjacency(DE_genes_matrix, power=picked_power, type="signed")
# Calculate topological overlap matrix and dissimilarity from adjacency matrix
TOM_DE = TOMsimilarity(adjacency_DE, TOMType="signed")
## ..connectivity..
## ..matrix multiplication (system BLAS)..
## ..normalization..
## ..done.
# Add gene names to row and columns
rownames(TOM_DE) = AnnotationDbi::select(org.Sc.sgd.db,keys=row.names(adjacency_DE),columns="GENENAME") %>% mutate(label = case_when(
is.na(GENENAME) ~ ORF,
TRUE ~ GENENAME)) %>% pull(label)
## 'select()' returned 1:1 mapping between keys and columns
colnames(TOM_DE) = AnnotationDbi::select(org.Sc.sgd.db,keys=row.names(adjacency_DE),columns="GENENAME") %>% mutate(label = case_when(
is.na(GENENAME) ~ ORF,
TRUE ~ GENENAME)) %>% pull(label)
## 'select()' returned 1:1 mapping between keys and columns
# assign TOM to new variable
adj_DE <- TOM_DE
# only show correlations greater than corr_cutoff
adj_DE[adj_DE > corr_cutoff] = 1
# set all correlations below that threshold to 0
adj_DE[adj_DE != 1] = 0
# graph adjacency network
network_graph_data_DE <- graph.adjacency(adj_DE)
# remove redundant self-loops in the network
network_graph_data_DE <- simplify(network_graph_data_DE)
# add colors for the network graph from constructed network modules
V(network_graph_data_DE)$color <- DE_genes_network$colors
# remove unconnected nodes
network_graph_data_DE <- delete.vertices(network_graph_data_DE,
degree(network_graph_data_DE)==0)
# set plotting margins
par(mar=c(0.5,0.5,0.5,0.5))
# generate plot
plot(network_graph_data_DE,
layout=layout.fruchterman.reingold(network_graph_data_DE, niter=1000),
vertex.size=3, edge.arrow.size=0.1,
vertex.label.cex = 0.01, # increase this value to see gene names
vertex.label.color= "black", rescale=TRUE, ylim=c(-1,1),xlim=c(-1,1),
main = "Network plot for differentially expressed genes")
Let’s do GO enrichments on these terms as well.
# create list of genes in each one
DE_geneIDs_module_list <- normalized_counts_DE_genes %>%
data.frame() %>%
mutate(
gene_id = row.names(.)
) %>%
pivot_longer(-gene_id) %>%
mutate(
module = DE_genes_network$colors[gene_id]
) %>%
group_by(module) %>%
# only get genes that are DE
# filter(DE_direction != 0) %>%
group_split() %>% # split into many lists
purrr::set_names(purrr::map_chr(., ~.x$module[1])) %>% # assign names
# get just the ORF names
map(., ~ .x |> pull(all_of("gene_id"))) %>%
# remove duplicates
map(., ~ unique(.))
GO_enrich_DE_gene_modules <- clusterProfiler::compareCluster(
geneCluster = DE_geneIDs_module_list,
ont = "BP",
OrgDb = org.Sc.sgd.db,
pAdjustMethod = "BH",
pvalueCutoff = 0.5,
qvalueCutoff = 0.5,
readable = FALSE,
fun = 'enrichGO',
keyType = 'ORF'
) |>
# let's add a 'richFactor' column that gives us the proportion of genes DE in the term
mutate(richFactor = Count / as.numeric(sub("/\\d+", "", BgRatio)))
# get rid of sticks to save space
GO_enrich_DE_gene_modules %>%
as.data.frame() %>%
group_by(Cluster) %>%
slice_min(order_by=pvalue, n = 5) %>%
ggplot(.,
aes(Cluster, fct_reorder(Description, richFactor))) +
geom_point(aes(color = p.adjust, size = Count)) +
scale_color_gradientn(
colours = c("#f7ca64", "#46bac2", "#7e62a3"),
trans = "log10",
guide = guide_colorbar(reverse = TRUE, order = 1)
) +
scale_size_continuous(range = c(2, 12)) +
scale_x_discrete(position = "top") +
scale_y_discrete(labels = function(x) str_wrap(x, width = 25)) +
xlab("Module") +
ylab(NULL) +
ggtitle("Enriched GO Categories by Module") +
# facet_wrap(~DE, scales = "free_y") +
theme_bw() +
theme(text = element_text(size=14),
axis.text.x = element_text(angle = 90,hjust=0) )
# here are the data to add to cowplot:
GO_summaries_DE_genes <- GO_enrich_DE_gene_modules %>%
as.data.frame() %>%
group_by(Cluster) %>%
slice_min(order_by=p.adjust, n = 5) %>%
slice_head(n = 5) %>%
mutate(Description = case_when(
p.adjust < 0.05 ~ paste0(Description," (",Count,")"),
TRUE ~ " "
)) %>%
summarize(description = str_c(Description, collapse = "\n")) %>%
pull(description)
# draw plot with GO terms
cowplot::ggdraw(DE_genes_plot +
theme(plot.margin = margin(t = 0.5, r = 5, b = 0.5, l = 0.1, unit = "cm"))) +
cowplot::draw_text(GO_summaries_DE_genes,
x = 0.76, # Adjust this value for the desired horizontal position
y = rev(seq(0.05, 0.82, length.out = length(GO_summaries_DE_genes))), # Adjust this sequence for the desired vertical positions
size = 6,
hjust = 0, # Align GO terms to the left
vjust = 0.5)
Question 1:
Be sure to knit this file into a pdf or html file once you’re finished.
System information for reproducibility:
pander::pander(sessionInfo())
R version 4.3.1 (2023-06-16)
Platform: aarch64-apple-darwin20 (64-bit)
locale: en_US.UTF-8||en_US.UTF-8||en_US.UTF-8||C||en_US.UTF-8||en_US.UTF-8
attached base packages: stats4, stats, graphics, grDevices, utils, datasets, methods and base
other attached packages: igraph(v.1.5.1), cowplot(v.1.1.1), org.Sc.sgd.db(v.3.18.0), AnnotationDbi(v.1.64.1), IRanges(v.2.36.0), S4Vectors(v.0.40.1), Biobase(v.2.62.0), BiocGenerics(v.0.48.1), matrixStats(v.1.0.0), edgeR(v.4.0.1), limma(v.3.58.1), WGCNA(v.1.72-1), fastcluster(v.1.2.3), dynamicTreeCut(v.1.63-1), reactable(v.0.4.4), Glimma(v.2.12.0), statmod(v.1.5.0), BiocManager(v.1.30.22), pander(v.0.6.5), knitr(v.1.45), lubridate(v.1.9.3), forcats(v.1.0.0), stringr(v.1.5.0), dplyr(v.1.1.3), purrr(v.1.0.2), readr(v.2.1.4), tidyr(v.1.3.0), tibble(v.3.2.1), ggplot2(v.3.4.4), tidyverse(v.2.0.0) and pacman(v.0.5.1)
loaded via a namespace (and not attached): splines(v.4.3.1), later(v.1.3.1), ggplotify(v.0.1.2), bitops(v.1.0-7), filelock(v.1.0.2), polyclip(v.1.10-6), preprocessCore(v.1.64.0), rpart(v.4.1.21), lifecycle(v.1.0.3), doParallel(v.1.0.17), lattice(v.0.22-5), vroom(v.1.6.4), MASS(v.7.3-60), backports(v.1.4.1), magrittr(v.2.0.3), Hmisc(v.5.1-1), sass(v.0.4.7), rmarkdown(v.2.25), jquerylib(v.0.1.4), yaml(v.2.3.7), httpuv(v.1.6.12), RColorBrewer(v.1.1-3), DBI(v.1.1.3), abind(v.1.4-5), zlibbioc(v.1.48.0), GenomicRanges(v.1.54.1), ggraph(v.2.1.0), RCurl(v.1.98-1.13), yulab.utils(v.0.1.0), nnet(v.7.3-19), tweenr(v.2.0.2), rappdirs(v.0.3.3), GenomeInfoDbData(v.1.2.11), enrichplot(v.1.22.0), ggrepel(v.0.9.4), tidytree(v.0.4.5), codetools(v.0.2-19), DelayedArray(v.0.28.0), ggforce(v.0.4.1), DOSE(v.3.28.0), tidyselect(v.1.2.0), aplot(v.0.2.2), farver(v.2.1.1), viridis(v.0.6.4), BiocFileCache(v.2.10.1), base64enc(v.0.1-3), jsonlite(v.1.8.7), ellipsis(v.0.3.2), tidygraph(v.1.2.3), Formula(v.1.2-5), survival(v.3.5-7), iterators(v.1.0.14), foreach(v.1.5.2), tools(v.4.3.1), treeio(v.1.26.0), HPO.db(v.0.99.2), Rcpp(v.1.0.11), glue(v.1.6.2), gridExtra(v.2.3), SparseArray(v.1.2.2), xfun(v.0.41), DESeq2(v.1.42.0), qvalue(v.2.34.0), MatrixGenerics(v.1.14.0), GenomeInfoDb(v.1.38.0), withr(v.2.5.2), fastmap(v.1.1.1), fansi(v.1.0.5), digest(v.0.6.33), gridGraphics(v.0.5-1), timechange(v.0.2.0), R6(v.2.5.1), mime(v.0.12), colorspace(v.2.1-0), GO.db(v.3.18.0), RSQLite(v.2.3.2), utf8(v.1.2.4), generics(v.0.1.3), data.table(v.1.14.8), graphlayouts(v.1.0.1), httr(v.1.4.7), htmlwidgets(v.1.6.2), S4Arrays(v.1.2.0), scatterpie(v.0.2.1), pkgconfig(v.2.0.3), gtable(v.0.3.4), blob(v.1.2.4), impute(v.1.76.0), XVector(v.0.42.0), shadowtext(v.0.1.2), clusterProfiler(v.4.10.0), htmltools(v.0.5.7), fgsea(v.1.28.0), scales(v.1.2.1), png(v.0.1-8), ggfun(v.0.1.3), rstudioapi(v.0.15.0), tzdb(v.0.4.0), reshape2(v.1.4.4), nlme(v.3.1-163), checkmate(v.2.3.0), curl(v.5.1.0), cachem(v.1.0.8), BiocVersion(v.3.18.0), parallel(v.4.3.1), HDO.db(v.0.99.1), foreign(v.0.8-85), pillar(v.1.9.0), grid(v.4.3.1), vctrs(v.0.6.4), promises(v.1.2.1), dbplyr(v.2.4.0), xtable(v.1.8-4), cluster(v.2.1.4), htmlTable(v.2.4.2), evaluate(v.0.23), cli(v.3.6.1), locfit(v.1.5-9.8), compiler(v.4.3.1), rlang(v.1.1.1), crayon(v.1.5.2), labeling(v.0.4.3), fs(v.1.6.3), plyr(v.1.8.9), stringi(v.1.7.12), viridisLite(v.0.4.2), BiocParallel(v.1.36.0), MPO.db(v.0.99.7), munsell(v.0.5.0), Biostrings(v.2.70.1), lazyeval(v.0.2.2), GOSemSim(v.2.28.0), Matrix(v.1.6-1.1), patchwork(v.1.1.3), hms(v.1.1.3), bit64(v.4.0.5), KEGGREST(v.1.42.0), shiny(v.1.7.5.1), SummarizedExperiment(v.1.32.0), interactiveDisplayBase(v.1.40.0), highr(v.0.10), AnnotationHub(v.3.10.0), memoise(v.2.0.1), bslib(v.0.5.1), ggtree(v.3.10.0), fastmatch(v.1.1-4), bit(v.4.0.5), gson(v.0.1.0) and ape(v.5.7-1)